*********************************************************************************
* Task:   Match IMR with air pollution monitor sites and assign TSP/PM10 level to each IMR point      *
* Method: Drop those IMR without any pollution sites within 150 km radius                             *
*         Those IMR with any pollution sites within 25 km radius, use the nearest                     *
*         Those IMR with any pollution sites within 100 km but not within 25km radius,                * 
*               use the weighted (1/distance) avearge                                                 *
*********************************************************************************

*** set path here *** 

#delimit ;  set type double, permanently ; 
clear ;  clear matrix ; clear mata ; 
set matsize 5000 ;  set maxvar 5000 ;   set more off;   set rmsg on ;  pause on;  

tempfile airpollution  ; 

*======================================================================================================================================================;
* SECTION 1 BREAK SECTION 1 BREAK SECTION 1 BREAK SECTION 1 BREAK SECTION 1 BREAK SECTION 1 BREAK  SECTION 1 BREAK  SECTION 1 BREAK  SECTION 1 BREAK   ;
*======================================================================================================================================================;
***Match IMR points and air pollution sites ; 

foreach p in pm10 so2 no2 { ; 

	use $path\EPA_AirPollution_09_11.dta, clear ;    
	save `airpollution', replace ;  
	
	use $path\IMR_sites.dta, clear ;  
	keep code longitude latitude ; 
	ren longitude longitude1 ; 
	ren latitude  latitude1 ; 
	
	cross using `airpollution' ;    
	
	*** interpolation, no extrapolation ; 
	bysort city_id: ipolate `p' year, gen(`p'_int) ;   
	drop if `p'_int==. | `p'_int==0 ;                  
	replace `p'=`p'_int ;  
	
	drop if `p'==. | `p'==0 ;     
	
	keep code year city_en city_id `p' longitude latitude longitude1 latitude1 ; 
	geodist latitude longitude latitude1 longitude1, gen(dist) ;    * distance==0 is perfect match ; 
	sort year code city_id dist ; 
	keep if dist<=100 ;    
	bysort year code: gen np100=_N ; tab np ; 
	
	gen a = dist<=25 ; 
	bysort year code: egen match25=max(a) ; 
	bysort year code: egen c=min(dist) ; 
	drop if match25==1 & c~=dist ; 
	
	bysort year code: egen deno_wgh=sum(1/dist) ; 
	gen weights=(1/dist)/deno_wgh ; 
	replace weights=1 if dist==0 & weights==. ; 
	
	collapse (sum) `p' np100 match25 [pweight=weights], by(code year) ; 
	replace `p'=. if `p'==0 ;     * sum in collapse produces 0s if the IMR/year cell contains only missing values. This is a double fix since "drop if `p'==. | `p'==0" also fix the same problem  ;  
	
	
	label var np100 "# of pollution sites within 150km radius" ; 
	label var match25 "Successful match within 25km radius" ;  
	
	save IMR_`p'_09_11.dta, replace ; 

} ; 


use IMR_pm10_09_11.dta, clear ; 
merge 1:1 code year using IMR_so2_09_11.dta ;  drop _merge ;  
merge 1:1 code year using IMR_no2_09_11.dta ;  drop _merge ;   
drop np100 match25 ;  

* collapse (mean) pm10 so2 no2, by(code) ; 
save IMR_airpollution_match, replace ;    // 09-11 average air pollution level 



